In [9]:
import pandas as pd
import matplotlib.pyplot as plt
import numpy as np
In [10]:
df = pd.read_csv('global_data.csv')
df.head()
Out[10]:
year avg_temp
0 1750 8.72
1 1751 7.98
2 1752 5.78
3 1753 8.39
4 1754 8.47
In [ ]:
# this returns a tuple of the dimensions of the dataframe
df.shape
In [ ]:
# this returns the datatypes of the columns
df.dtypes
In [ ]:
# this displays a concise summary of the dataframe,
# including the number of non-null values in each column
df.info()
In [ ]:
# this returns the number of unique values in each column
df.nunique()
In [ ]:
# this returns useful descriptive statistics for each column of data
df.describe()
In [ ]:
df.shape
In [ ]:
red_df.isnull().sum()
In [ ]:
white_df.duplicated().sum()
In [ ]:
# check for duplicates in the data
sum(df.duplicated())
In [ ]:
# use means to fill in missing values
df.fillna(value='FILL VALUE')
In [ ]:
mean = df['texture_mean'].mean() 
df.fillna(df.mean(),inplace=True)
In [ ]:
# drop duplicates
df.drop_duplicates(inplace=True)
In [ ]:
# confirm correction by rechecking for duplicates in the data
sum(df.duplicated())
In [ ]:
# sales for the last month
df.iloc[196:, 1:].sum().plot(kind='bar');
In [ ]:
# average sales
df.mean().plot(kind='pie');
In [ ]:
# sales for the week of March 13th, 2016
sales = df[df['week'] == '2016-03-13']
sales.iloc[0, 1:].plot(kind='bar');
In [ ]:
# sales for the lastest 3-month periods
last_three_months = df[df['week'] >= '2017-12-01']
last_three_months.iloc[:, 1:].sum().plot(kind='pie')
In [ ]:
# sales on march 13, 2016
df[df['week'] == '2016-03-13']
In [ ]:
# worst week for store C
df[df['store C'] == df['store C'].min()]
In [ ]:
# total sales during most recent 3 month period
last_three_months = df[df['week'] >= '2017-12-01']
last_three_months.iloc[:, 1:].sum()  # exclude sum of week column
In [ ]:
# plot relationship between temperature and electrical output
df.plot(x='temperature', y='energy_output', kind='scatter');
In [ ]:
# plot distribution of humidity
df['humidity'].hist();
In [ ]:
# plot box plots for each variable
df['temperature'].plot(kind='box');
In [ ]:
df_census['education'].value_counts()
In [ ]:
# We can call the plot function on this result to create a bar chart.
df_census['education'].value_counts().plot(kind='bar');
In [ ]:
#Similarly, we also need value_counts to plot pie charts.
df_census['workclass'].value_counts().plot(kind='pie', figsize=(8, 8));
In [ ]:
df.plot(x='concavity_mean', y='concavity_se', kind='scatter');
In [ ]:
df['concave points_worst'].plot(kind='box')

Project¶

In [ ]:
global_temp = pd.read_csv('global_data.csv') # importing 'global tempreature data'
city_temp = pd.read_csv('city_data1.csv') # importing 'city tempreature data1' which is a data for one Hansa Hamburg over years.
In [ ]:
glb_mv_avg = globaltemp['avg_temp'].rolling(10).mean()
local_mv_avg = citytemp['avg_temp'].rolling(10).mean()
In [ ]:
#Local Data is as same as Hansa Hamburg
plt.plot(global_temp['year'],glb_mv_avg,label='Global')
plt.plot(city_temp['year'],local_mv_avg,label='Hamburg')
plt.legend()
plt.xlabel("Years")
plt.ylabel("Temperature (°C)") 
plt.title("Hansa Hamburg Average Temperature") 
plt.rcParams.update({ 'font.size' : 10 })
plt.show()
In [ ]:
#Local Data is as same as Hansa Hamburg
plt.plot(city_temp['year'],local_mv_avg,label='Hamburg')
plt.legend()
plt.xlabel("Years")
plt.ylabel("Temperature (°C)") 
plt.title("Hansa Hamburg Average Temperature") 
plt.rcParams.update({ 'font.size' : 10 })
plt.show()
In [ ]:
#Local Data is as same as Hansa Hamburg
plt.plot(global_temp['year'],glb_mv_avg,label='Global')
plt.legend()
plt.xlabel("Years")
plt.ylabel("Temperature (°C)") 
plt.title("Hansa Hamburg Average Temperature") 
plt.rcParams.update({ 'font.size' : 10 })
plt.show()
In [ ]:
# plot relationship between temperature and electrical output
df.plot(x='temperature', y='energy_output', kind='scatter');
In [ ]:
 
In [ ]:
sns.catplot(x='Pclass', y='Age', data=titanic)
In [ ]:
titanic = pd.read_csv('titanic.csv')
sns.pairplot(df)
In [ ]:
sns.pairplot(df, hue="Day")  
In [ ]:
titanic = sns.boxenplot(x="Pclass", y="Age", data=titanic)
In [ ]:
sns.catplot(x='Pclass', y='Age', data=titanic, hue='Sex')
In [ ]:
sns.boxplot(x='Pclass', y='Age', data=titanic, hue='Sex')
In [ ]:
sns.jointplot('Age', 'Fare', data=titanic, kind='kde')
In [ ]:
sns.jointplot('Age', 'Fare', data=titanic, kind='hex')